{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DataFrame and Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.read_excel(\"xl/course_participants.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[[\"Mark\", 55, \"Italy\", 4.5, \"Europe\"],\n",
    "      [\"John\", 33, \"USA\", 6.7, \"America\"],\n",
    "      [\"Tim\", 41, \"USA\", 3.9, \"America\"],\n",
    "      [\"Jenny\", 12, \"Germany\", 9.0, \"Europe\"]]\n",
    "df = pd.DataFrame(data=data,\n",
    "                  columns=[\"name\", \"age\", \"country\",\n",
    "                           \"score\", \"continent\"],\n",
    "                  index=[1001, 1000, 1002, 1003])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.index.name = \"user_id\"\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# \"reset_index\" turns the index into a column, replacing the\n",
    "# index with the default index. This corresponds to the DataFrame\n",
    "# from the beginning that we loaded from Excel.\n",
    "df.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# \"reset_index\" turns \"user_id\" into a regular column and\n",
    "# \"set_index\" turns the column \"name\" into the index\n",
    "df.reset_index().set_index(\"name\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.reindex([999, 1000, 1001, 1004])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.sort_values([\"continent\", \"age\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns.name = \"properties\"\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.rename(columns={\"name\": \"First Name\", \"age\": \"Age\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop(columns=[\"name\", \"country\"],\n",
    "        index=[1000, 1003])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.T  # Shortcut for df.transpose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[:, [\"continent\", \"country\", \"name\", \"age\", \"score\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Manipulation\n",
    "## Selecting Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using scalars for both row and column selection returns a scalar\n",
    "df.loc[1001, \"name\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using a scalar on either the row or column selection returns a Series\n",
    "df.loc[[1001, 1002], \"age\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Selecting multiple rows and columns returns a DataFrame\n",
    "df.loc[:1002, [\"name\", \"country\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[0, 0]  # Returns a Scalar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[[0, 2], 1]  # Returns a Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[:3, [0, 2]]  # Returns a DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf = (df[\"age\"] > 40) & (df[\"country\"] == \"USA\")\n",
    "tf  # This is a Series with only True/False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[tf, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[df.index > 1001, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[df[\"country\"].isin([\"Italy\", \"Germany\"]), :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This could be the yearly rainfall in millimeters\n",
    "rainfall = pd.DataFrame(data={\"City 1\": [300.1, 100.2],\n",
    "                              \"City 2\": [400.3, 300.4],\n",
    "                              \"City 3\": [1000.5, 1100.6]})\n",
    "rainfall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall < 400"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall[rainfall < 400]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A MultiIndex needs to be sorted\n",
    "df_multi = df.reset_index().set_index([\"continent\", \"country\"])\n",
    "df_multi = df_multi.sort_index()\n",
    "df_multi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_multi.loc[\"Europe\", :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_multi.loc[(\"Europe\", \"Italy\"), :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_multi.reset_index(level=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setting Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Copy the DataFrame first to leave the original untouched\n",
    "df2 = df.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.loc[1000, \"name\"] = \"JOHN\"\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.loc[[1000, 1001], \"score\"] = [3, 4]\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf = (df2[\"age\"] < 20) | (df2[\"country\"] == \"USA\")\n",
    "df2.loc[tf, \"name\"] = \"xxx\"\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Copy the DataFrame first to leave the original untouched\n",
    "rainfall2 = rainfall.copy()\n",
    "rainfall2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set the values to 0 wherever they are below 400\n",
    "rainfall2[rainfall2 < 400] = 0\n",
    "rainfall2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.replace(\"USA\", \"U.S.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.loc[:, \"discount\"] = 0\n",
    "df2.loc[:, \"price\"] = [49.9, 49.9, 99.9, 99.9]\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df.copy()  # let's start with a fresh copy\n",
    "df2.loc[:, \"birth year\"] = 2021 - df2[\"age\"]\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Missing Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df.copy() # let's start with a fresh copy\n",
    "df2.loc[1000, \"score\"] = None\n",
    "df2.loc[1003, :] = None\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.dropna(how=\"all\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.isna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.fillna({\"score\": df2[\"score\"].mean()})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Duplicate Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop_duplicates([\"country\", \"continent\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"country\"].is_unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"country\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# By default, it marks only duplicates as True, i.e.\n",
    "# without the first occurrence\n",
    "df[\"country\"].duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# To get all rows where \"country\" is duplicated, use\n",
    "# keep=False\n",
    "df.loc[df[\"country\"].duplicated(keep=False), :]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Arithmetic Operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall + 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "more_rainfall = pd.DataFrame(data=[[100, 200], [300, 400]],\n",
    "                             index=[1, 2],\n",
    "                             columns=[\"City 1\", \"City 4\"])\n",
    "more_rainfall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall + more_rainfall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall.add(more_rainfall, fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A Series taken from a row\n",
    "rainfall.loc[1, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall + rainfall.loc[1, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A Series taken from a column\n",
    "rainfall.loc[:, \"City 2\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall.add(rainfall.loc[:, \"City 2\"], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's create a new DataFrame\n",
    "users = pd.DataFrame(data=[\" mArk \", \"JOHN  \", \"Tim\", \" jenny\"],\n",
    "                     columns=[\"name\"])\n",
    "users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "users_cleaned = users.loc[:, \"name\"].str.strip().str.capitalize()\n",
    "users_cleaned"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "users_cleaned.str.startswith(\"J\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Applying a Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_string(x):\n",
    "    return f\"{x:,.2f}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Note that we pass in the function without calling it,\n",
    "# i.e., format_string and not format_string()!\n",
    "rainfall.applymap(format_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall.applymap(lambda x: f\"{x:,.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Combining DataFrames\n",
    "## Concatenating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[[15, \"France\", 4.1, \"Becky\"],\n",
    "      [44, \"Canada\", 6.1, \"Leanne\"]]\n",
    "more_users = pd.DataFrame(data=data,\n",
    "                          columns=[\"age\", \"country\", \"score\", \"name\"],\n",
    "                          index=[1000, 1011])\n",
    "more_users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.concat([df, more_users], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[[3, 4],\n",
    "      [5, 6]]\n",
    "more_categories = pd.DataFrame(data=data,\n",
    "                               columns=[\"quizzes\", \"logins\"],\n",
    "                               index=[1000, 2000])\n",
    "more_categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.concat([df, more_categories], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Joining and Merging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]],\n",
    "                   columns=[\"A\", \"B\"])\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = pd.DataFrame(data=[[10, 20], [30, 40]],\n",
    "                   columns=[\"C\", \"D\"], index=[1, 3])\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.join(df2, how=\"inner\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.join(df2, how=\"left\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.join(df2, how=\"right\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.join(df2, how=\"outer\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add a column called \"category\" to both DataFrames\n",
    "df1[\"category\"] = [\"a\", \"b\", \"c\"]\n",
    "df2[\"category\"] = [\"c\", \"b\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.merge(df2, how=\"inner\", on=[\"category\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.merge(df2, how=\"left\", on=[\"category\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Aggregation and Descriptive Statistics\n",
    "## Descriptive Statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainfall.mean(axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Grouping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.groupby([\"continent\"]).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.groupby([\"continent\", \"country\"]).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.groupby([\"continent\"]).agg(lambda x: x.max() - x.min())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pivoting and Melting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = [[\"Oranges\", \"North\", 12.30],\n",
    "        [\"Apples\", \"South\", 10.55],\n",
    "        [\"Oranges\", \"South\", 22.00],\n",
    "        [\"Bananas\", \"South\", 5.90],\n",
    "        [\"Bananas\", \"North\", 31.30],\n",
    "        [\"Oranges\", \"North\", 13.10]]\n",
    "\n",
    "sales = pd.DataFrame(data=data,\n",
    "                     columns=[\"Fruit\", \"Region\", \"Revenue\"])\n",
    "sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pivot = pd.pivot_table(sales,\n",
    "                       index=\"Fruit\", columns=\"Region\",\n",
    "                       values=\"Revenue\", aggfunc=\"sum\",\n",
    "                       margins=True, margins_name=\"Total\")\n",
    "pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.melt(pivot.iloc[:-1,:-1].reset_index(),\n",
    "        id_vars=\"Fruit\",\n",
    "        value_vars=[\"North\", \"South\"], value_name=\"Revenue\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Plotting\n",
    "## Matplotlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "%matplotlib inline\n",
    "# Or %matplotlib notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame(data=np.random.rand(4, 4) * 100000,\n",
    "                    index=[\"Q1\", \"Q2\", \"Q3\", \"Q4\"],\n",
    "                    columns=[\"East\", \"West\", \"North\", \"South\"])\n",
    "data.index.name = \"Quarters\"\n",
    "data.columns.name = \"Region\"\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.plot()  # Shortcut for data.plot.line()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plotly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set the plotting backend to Plotly\n",
    "pd.options.plotting.backend = \"plotly\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display the same data as bar plot\n",
    "data.plot.bar(barmode=\"group\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Import and Export\n",
    "## Exporting to a CSV file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(\"course_participants.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Importing a CSV file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft = pd.read_csv(\"csv/MSFT.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# I am selecting a few columns because of space issues\n",
    "# You can also just run: msft.head()\n",
    "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].tail(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.loc[:, [\"Adj Close\", \"Volume\"]].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The line break in the URL is only to make it fit on the page\n",
    "url = (\"https://raw.githubusercontent.com/fzumstein/\"\n",
    "       \"python-for-excel/1st-edition/csv/MSFT.csv\")\n",
    "msft = pd.read_csv(url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].head(2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
